import pandas as pd
import numpy as np
import re
import os
from datetime import datetime
import plotly.express as px
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=False)
cities = ['Amsterdam','LA','Melbourne','NYC','Rio','Tokyo','Venice']
data_dir = os.getcwd() + '/data/' #/data/ or /data-sample/
def get_dir(dir, group):
'''Input current data directory, sub dirs, and return full directory of all data files'''
file_dirs = []
for g in group:
sub_dir = os.path.join(dir, g)
file_dir = [os.path.join(sub_dir,file) for file in os.listdir(sub_dir) if os.path.isfile(os.path.join(sub_dir, file))]
file_dirs = file_dirs + file_dir
return file_dirs
def read_data(file_dirs, group):
'''Read all file directories and return dfs with markers'''
dfs = []
for file_dir in file_dirs:
df = pd.read_csv(file_dir)
date = re.sub('.csv(.*?)','',re.sub('(.*?)_','',file_dir))
city = [g for g in group if g in file_dir]
#add date/city markers
df['date'] = datetime.strptime(date, '%y-%m-%d')
df['city'] = ''.join(city)
dfs.append(df)
return dfs
dfs = read_data(get_dir(data_dir,cities),cities)
#merge all
df = pd.concat(dfs, axis=0, ignore_index=True, sort=False)
print("Dataframe shape:{}".format(df.shape))
df.head()
df['city'].value_counts()
def mapbox_express(dataframe):
'''Make Plotly Express with time animations '''
px.set_mapbox_access_token(open(".mapbox_token").read())
fig = px.scatter_mapbox(dataframe, lat="latitude", lon="longitude", color="room_type", size="price", title=dataframe['city'].values[0],
color_continuous_scale=px.colors.cyclical.IceFire, size_max=15, zoom=10,animation_frame="date")
fig.show()
for city in cities:
temp_df = df[df['city'] == city]
temp_df = temp_df.assign(date=lambda d: d['date'].astype(str)) #convert datetime to string
temp_df = temp_df.sort_values(by=['date']) #sort
mapbox_express(temp_df)